#! /bin/sh

# Check if the build assigned to copr worker is actually running or not.
# If it is not running, then this is some bug related to:
# https://pagure.io/copr/copr/issue/987

prefix=copr:backend:vm_instance:hset::

{% if devel %}
hostname=copr.stg.fedoraproject.org
{% else %}
hostname=copr.fedoraproject.org
{% endif %}

set -- $(redis-cli --scan --pattern "$prefix*")

for worker; do
    build_id=$(redis-cli hget "$worker" build_id)
    test -z "$build_id" && continue

    since=$(redis-cli hget "$worker" in_use_since)
    # race, hopefully - the in_use_since field is not yet set even though the
    # worker is assigned to build
    test -n "$since" || continue

    # don't kill younger VMs than half an hour
    candidate=$(python -c "import time; out = ':' if time.time() - $since > 1800 else 'false'; print(out)")
    ! $candidate && continue

    # now check what's up with the build
    output=$(curl --fail "https://$hostname/api_3/build/$build_id/" 2>/dev/null)
    if test $? -ne 0; then
        # curl --fail said server error, but it still can be 404 (deleted build)
        case $(curl "https://$hostname/api_3/build/$build_id/" 2>/dev/null) in
            *'does not exist'*) state=deleted ;;
            *) continue ;;  # skip normal curl failures, fe is just not available
        esac
    else
        state=$(echo "$output" | python3 -c 'import sys, json; print(json.load(sys.stdin)["state"])')
    fi

    case $state in
        running) continue ;;
        cancel*|succeeded|failed|deleted) ;; # go to delete
        *) echo 2>&1 "$worker state=$state build_id=$build_id  skip" ; continue ;;
    esac

    echo >&2 "REMOVING $since -- $worker"
    ip=$(redis-cli hget "$worker" vm_ip)
    timeout 5 ssh "root@$ip" shutdown -h now &>/dev/null
done
